load libraries

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

load data

years <- 2017:2021
quarters <- 1:4
types <- c("Electric","Gas")

pge_data_raw <- NULL

for(year in years) {
  for(quarter in quarters) {
    for(type in types) {
      
      filename <-
        paste0(
          "pge_data/PGE_",
          year,
          "_Q",
          quarter,
          "_",
          type,
          "UsageByZip.csv"
        )

      # print(filename)
        
      # for Q's that don't exist
      if(!file.exists(filename)) next
      
      temp <- read_csv(filename)
      
      
      # converting to standardize units
      
      if(type == "Electric") {
        temp <-
          temp %>%
          mutate(TOTALKBTU = TOTALKWH * 3.412) %>%
          select(-AVERAGEKWH,-TOTALKWH)
      }
      
      if(type == "Gas") {
        temp <-
          temp %>%
          mutate(TOTALKBTU = TOTALTHM * 99.976) %>%
          select(-AVERAGETHM,-TOTALTHM)
      }

      pge_data_raw <-
        rbind(pge_data_raw, temp)
      
    }
  }
}
## Rows: 7776 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3486 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7819 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3489 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7842 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3491 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (4): ZIPCODE, MONTH, YEAR, AVERAGETHM
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 10455 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4662 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7852 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3499 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7880 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3502 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (4): ZIPCODE, MONTH, YEAR, AVERAGETHM
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7877 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3503 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (4): ZIPCODE, MONTH, YEAR, AVERAGETHM
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7875 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3496 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7870 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3499 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7887 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3499 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7891 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3489 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7874 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3490 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7865 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3493 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7791 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3461 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7784 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3459 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (4): ZIPCODE, MONTH, YEAR, AVERAGETHM
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7771 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3456 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (4): ZIPCODE, MONTH, YEAR, AVERAGETHM
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7580 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3458 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 7611 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3459 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

manipulating data

# make two data objects for plotting later
# one for resdiential, one for commercial
residential_data <-
  filter(
    pge_data_raw, CUSTOMERCLASS %in% 
           c(
             "Elec- Residential", 
             "Gas- Residential"
             )
    )

commercial_data <-
  filter(
    pge_data_raw, CUSTOMERCLASS %in% 
           c(
             "Elec- Commercial", 
             "Gas- Commercial"
             )
    )

# selecting only the columns we want
res_data_to_plot <-
  select(
    residential_data,
    MONTH, YEAR, CUSTOMERCLASS, TOTALKBTU
  )

com_data_to_plot <-
  select(
    commercial_data,
    MONTH, YEAR, CUSTOMERCLASS, TOTALKBTU
  )

# create month index to give us our 54 months
res_data_to_plot <-
  mutate(res_data_to_plot,
         MONTH_INDEX = 12 * (YEAR %% 2017) + MONTH)

com_data_to_plot <-
  mutate(com_data_to_plot,
         MONTH_INDEX = 12 * (YEAR %% 2017) + MONTH)

# grouping data by month, customer class
# res
res_data_to_plot_grouped <-
  group_by(
    res_data_to_plot,
    MONTH_INDEX, CUSTOMERCLASS
  )

# com
com_data_to_plot_grouped <-
  group_by(
    com_data_to_plot,
    MONTH_INDEX, CUSTOMERCLASS
  )

# summarize to sum up by month
res_month_summed <-
  summarise(
    res_data_to_plot_grouped,
    TOTALKBTU = sum(TOTALKBTU, na.rm = T)
  )
## `summarise()` has grouped output by 'MONTH_INDEX'. You can override using the `.groups` argument.
com_month_summed <-
  summarise(
    com_data_to_plot_grouped,
    TOTALKBTU = sum(TOTALKBTU, na.rm = T)
  )
## `summarise()` has grouped output by 'MONTH_INDEX'. You can override using the `.groups` argument.
#left off from textbook 1.7 3/4 down, "You'll notice that.."

plotting

# residential
res_plot <-
  res_month_summed %>% 
  ggplot() +
  geom_bar(
    aes(
      x = MONTH_INDEX,
      y = TOTALKBTU,
      fill = CUSTOMERCLASS
    ),
    stat = "identity",
    position = "stack"
  ) +
  labs(
    x = "Month",
    y = "kBtu",
    title = "Residential Energy Use 2017-2021",
    fill = "Energy Type"
  )

res_plot %>% ggplotly()
# commercial
com_plot <-
  com_month_summed %>% 
  ggplot() +
  geom_bar(
    aes(
      x = MONTH_INDEX,
      y = TOTALKBTU,
      fill = CUSTOMERCLASS
    ),
    stat = "identity",
    position = "stack"
  ) +
  labs(
    x = "Month",
    y = "kBtu",
    title = "Commercial Energy Use 2017-2021",
    fill = "Energy Type"
  )

com_plot %>% ggplotly()